################################################################################
## LOAD PACKAGES
################################################################################

library(ggplot2)
library(readxl)
library(dplyr)
library(tidyr)
library(writexl)

################################################################################
## SET WORKING DIRECTORY
################################################################################

getwd()

#setwd() # Set working directory here

# Read in the data
general.nl.correlations.df <- read_excel("files/general_nl_correlations_df.xlsx")
general.nl.correlations.df$vaa <- "General – Dutch"

general.fr.correlations.df <- read_excel("files/general_fr_correlations_df.xlsx")
general.fr.correlations.df$vaa <- "General – French"

youth.nl.correlations.df <- read_excel("files/youth_nl_correlations_df.xlsx")
youth.nl.correlations.df$vaa <- "Youth – Dutch"

youth.fr.correlations.df <- read_excel("files/youth_fr_correlations_df.xlsx")
youth.fr.correlations.df$vaa <- "Youth – French"

brussels.nl.correlations.df <- read_excel("files/brussels_nl_correlations_df.xlsx")
brussels.nl.correlations.df$vaa <- "Brussels – Dutch"

brussels.fr.correlations.df <- read_excel("files/brussels_fr_correlations_df.xlsx")
brussels.fr.correlations.df$vaa <- "Brussels – French"

flanders.nl.correlations.df <- read_excel("files/flanders_nl_correlations_df.xlsx")
flanders.nl.correlations.df$vaa <- "Flanders"

wallonia.fr.correlations.df <- read_excel("files/wallonia_fr_correlations_df.xlsx")
wallonia.fr.correlations.df$vaa <- "Wallonia"

federal.nl.correlations.df <- read_excel("files/federal_nl_correlations_df.xlsx")
federal.nl.correlations.df$vaa <- "Federal – Dutch"

federal.fr.correlations.df <- read_excel("files/federal_fr_correlations_df.xlsx")
federal.fr.correlations.df$vaa <- "Federal – French"

eu.nl.correlations.df <- read_excel("files/eu_nl_correlations_df.xlsx")
eu.nl.correlations.df$vaa <- "EU – Dutch"

eu.fr.correlations.df <- read_excel("files/eu_fr_correlations_df.xlsx")
eu.fr.correlations.df$vaa <- "EU – French"

# Combine into one dataframe
correlations.df <- rbind(general.nl.correlations.df, general.fr.correlations.df,
                         youth.nl.correlations.df, youth.fr.correlations.df,
                         brussels.nl.correlations.df, brussels.fr.correlations.df,
                         flanders.nl.correlations.df, wallonia.fr.correlations.df,
                         federal.nl.correlations.df, federal.fr.correlations.df,
                         eu.nl.correlations.df, eu.fr.correlations.df)

correlations.df <- correlations.df %>% select(vaa, theme, fisher_z_corr)

correlations_wide <- correlations.df %>%
  select(vaa, theme, fisher_z_corr) %>%
  pivot_wider(
    names_from = theme,
    values_from = fisher_z_corr
  )

# Add a row with the column-wise means
correlations_wide_with_avg <- correlations_wide %>%
  bind_rows(
    correlations_wide %>%
      select(-vaa) %>%
      summarise(across(everything(), mean, na.rm = TRUE)) %>%
      mutate(vaa = "Average") %>%
      select(vaa, everything())
  )

correlations_wide_with_avg <- correlations_wide_with_avg[, c(names(correlations_wide_with_avg)[1], sort(names(correlations_wide_with_avg)[-1]))]

write_xlsx(correlations_wide_with_avg, "correlations_wide.xlsx")

correlations.df <- correlations.df %>%
  group_by(theme) %>%
  summarise(avg_fisher_z = mean(fisher_z_corr, na.rm = TRUE)) %>%
  arrange(desc(avg_fisher_z))

correlations.df$avg_fisher_z <- as.numeric(correlations.df$avg_fisher_z)

# Convert factor to character, apply transformation, and convert back to factor
correlations.df$theme <- factor(sapply(tolower(as.character(correlations.df$theme)), function(x) {
  paste0(toupper(substring(x, 1, 1)), substring(x, 2))
}))

# Reorder theme factor by correlation (descending)
correlations.df$theme <- factor(correlations.df$theme, 
                                levels = correlations.df$theme[order(-correlations.df$avg_fisher_z)])

# Create the plot
png("figs/correlations_g.png", units = "in", width = 7, height = 5, res = 1200)

# Plot with filled bars, no legend, and value labels
ggplot(correlations.df, aes(x = theme, y = avg_fisher_z, fill = theme)) +
  geom_bar(stat = "identity", 
           color = "black", 
           size = 0.2) +  # Optional black border
  scale_fill_manual(values = c(
    "Crime" = "#42d4f4",
    "Culture" = "#f032e6",
    "Economy" = "#800000",
    "Education" = "#9A6324",
    "Environment" = "#3cb44b",
    "Ethics" = "#f58231",
    "Foreign" = "#ffe119",
    "Housing" = "#000075",
    "Immigration" = "#e6194B",
    "Mobility" = "#4363d8",
    "State" = "#911eb4",
    "Welfare" = "#bfef45"
  )) +
  scale_y_continuous(
    limits = c(0, 0.6),         # Set the y-axis range
    breaks = seq(0, 0.6, by = 0.1)  # Set the breaks (steps) on the y-axis
  ) +
  theme_minimal() +
  labs(
    x = "Theme",
    y = "Average Correlation"
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),  # Smaller x-axis text
    axis.text.y = element_text(size = 10),                          # Smaller y-axis text
    axis.title.x = element_blank(),    
    axis.title.y = element_text(size = 11, margin = margin(r = 8)), # Smaller y-axis title with right margin
    plot.title = element_blank(),                                    # Remove plot title
    legend.position = "none"                                         # Remove legend
  ) +
  # Add value labels above bars
  geom_text(aes(label = round(avg_fisher_z, 2)), vjust = -0.5, size = 3)

dev.off()